* This program prepares the transaction data for merge with HMDA data and then performs the merge

clear
set mem 2g
set more off
log using match, replace


**mortgage deed data mentioned in readme file.
use transaction_data
keep year id tract loanamount1 loanamount1a lendername name_code MortSale
save temp1, replace
clear

* First Merge based on perfect match
use hmda
keep year mort_id tract loanamount1 loanamount1a lendername name_code
joinby year tract loanamount1 lendername using temp1

sort year mort_id
qui by year mort_id: gen obs=_N
tab obs
sort year id
qui by year id: gen obs1=_N
tab obs1
drop if obs1>1
drop if obs>1
keep year mort_id id
gen match_code=1
save matched1, replace
clear

clear
use temp1
merge 1:1 year id using matched1
drop if _merge>1
drop _merge mort_id
save temp_trans, replace
clear

****************************************************************************************************

* Fix Names that correspond to subsidiaries that are owned by other company or might be listed as a bank instead of mortgage company

use temp_trans

gen lendername1=lendername
* These are subsidararies of Wells Fargo
replace lendername="WELLS FARGO BANK, NA" if lendername=="GREATER ATLANTA FINANCIAL SERVICES"
replace lendername="WELLS FARGO BANK, NA" if lendername=="AMERICAN PRIORITY MORTGAGE LLC"

* These are others
replace lendername="SUNTRUST MORTGAGE, INC" if lendername=="NATIONAL BANK OF COMMERCE"
replace lendername="CHASE MANHATTAN MORTGAGE CORP." if lendername=="CHASE MANHATTAN BANK USA, NA"
replace lendername="GMAC BANK" if lendername=="GMAC MORTGAGE LLC"
replace lendername="HSBC MORTGAGE CORP" if lendername=="DECISION ONE MORTGAGE"
replace lendername="1ST FINANCIAL BANK" if lendername=="FIRST FINANCIAL"
replace lendername="1ST NATIONAL BANK" if lendername=="FIRST NATIONAL BANK"
replace lendername="1ST TRUST BANK FOR SAVINGS" if lendername=="FIRST TRUST BANK"
replace lendername="OPTION ONE MORTGAGE CORP." if lendername=="H&R BLOCK"
replace lendername="OHIO SAVINGS BANK" if lendername=="AMTRUST BANK"
replace lendername="NATIONAL CITY BANK" if lendername=="FIRST FRANKLIN CORPORATION"
replace lendername="CITIBANK" if lendername=="CITI MORTGAGE"
replace lendername="FIRST TENNESSEE BANK NA" if lendername=="FIRST HORIZON HOME LOAN CORP"
replace lendername="WACHOVIA MORTGAGE" if lendername=="WACHOVIA BANK"
save temp,replace
clear

use hmda
merge 1:1 year mort_id using matched1
keep if _merge==1
drop _merge id
 keep year mort_id tract loanamount1 loanamount1a lendername name_code
joinby year tract loanamount1 lendername using temp
sort year mort_id
qui by year mort_id: gen obs=_N
tab obs
sort year id
qui by year id: gen obs1=_N
tab obs1
drop if obs>1
drop if obs1>1
keep year mort_id id

gen match_code=2
save matched2, replace
clear

use matched1
append using matched2
save match, replace
clear

use temp1
merge 1:1 year id using match
drop if _merge>1
drop _merge mort_id
save temp_trans, replace
clear


***************************************************************************************************

* Using name code
use hmda
drop name_code
merge 1:1 year mort_id using match
keep if _merge==1
drop _merge id
drop if lendername==""
gen name_code=soundex(lendername)
keep year mort_id tract loanamount1 loanamount1a lendername name_code
joinby year tract loanamount1 name_code using temp_trans
sort year mort_id
qui by year mort_id: gen obs=_N
tab obs
sort year id
qui by year id: gen obs1=_N
tab obs1
drop if obs>1
drop if obs1>1
keep year mort_id id

gen match_code=3
save matched3, replace
clear

use match
append using matched3
save match, replace
clear

use temp1
merge 1:1 year id using match
drop if _merge>1
drop _merge mort_id
save temp_trans, replace
clear


*****************************************************************************************************************

* Dropping MortSale=0 to get better matches where there are multiple matches
use temp_trans
drop if MortSale==0
save temp2, replace
clear

use hmda
merge 1:1 year mort_id using match
keep if _merge==1
drop _merge id
keep year mort_id tract loanamount1 loanamount1a lendername name_code
joinby year tract loanamount1 lendername using temp2

sort year mort_id
qui by year mort_id: gen obs=_N
tab obs
sort year id
qui by year id: gen obs1=_N
tab obs1
drop if obs1>1
drop if obs>1
keep year mort_id id
gen match_code=4
save matched4, replace
clear

use match
append using matched4
save match, replace
clear


use temp1
merge 1:1 year id using match
drop if _merge>1
drop _merge mort_id
save temp_trans, replace
clear

********************************************************************************************************************


* Fix Names that correspond to subsidiaries that are owned by other company or might be listed as a bank instead of mortgage company

use temp_trans
drop if MortSale==0

gen lendername1=lendername
* These are subsidararies of Wells Fargo
replace lendername="WELLS FARGO BANK, NA" if lendername=="GREATER ATLANTA FINANCIAL SERVICES"
replace lendername="WELLS FARGO BANK, NA" if lendername=="AMERICAN PRIORITY MORTGAGE LLC"

* These are others
replace lendername="SUNTRUST MORTGAGE, INC" if lendername=="NATIONAL BANK OF COMMERCE"
replace lendername="CHASE MANHATTAN MORTGAGE CORP." if lendername=="CHASE MANHATTAN BANK USA, NA"
replace lendername="GMAC BANK" if lendername=="GMAC MORTGAGE LLC"
replace lendername="HSBC MORTGAGE CORP" if lendername=="DECISION ONE MORTGAGE"
replace lendername="1ST FINANCIAL BANK" if lendername=="FIRST FINANCIAL"
replace lendername="1ST NATIONAL BANK" if lendername=="FIRST NATIONAL BANK"
replace lendername="1ST TRUST BANK FOR SAVINGS" if lendername=="FIRST TRUST BANK"
replace lendername="OPTION ONE MORTGAGE CORP." if lendername=="H&R BLOCK"
replace lendername="OHIO SAVINGS BANK" if lendername=="AMTRUST BANK"
replace lendername="NATIONAL CITY BANK" if lendername=="FIRST FRANKLIN CORPORATION"
replace lendername="CITIBANK" if lendername=="CITI MORTGAGE"
replace lendername="FIRST TENNESSEE BANK NA" if lendername=="FIRST HORIZON HOME LOAN CORP"
replace lendername="WACHOVIA MORTGAGE" if lendername=="WACHOVIA BANK"
save temp,replace
clear

use hmda
merge 1:1 year mort_id using match
keep if _merge==1
drop _merge id
keep year mort_id tract loanamount1 loanamount1a lendername name_code
joinby year tract loanamount1 lendername using temp
sort year mort_id
qui by year mort_id: gen obs=_N
tab obs
sort year id
qui by year id: gen obs1=_N
tab obs1
drop if obs>1
drop if obs1>1
keep year mort_id id

gen match_code=5
save matched5, replace
clear

use match
append using matched5
save match, replace
clear

use temp1
merge 1:1 year id using match
drop if _merge>1
drop _merge mort_id
save temp_trans, replace
clear


***************************************************************************************************

* Using name code
use temp_trans
drop if MortSale==0
save temp2, replace
clear

use hmda
drop name_code
merge 1:1 year mort_id using match
keep if _merge==1
drop _merge id
drop if lendername==""
gen name_code=soundex(lendername)
keep year mort_id tract loanamount1 loanamount1a lendername name_code
joinby year tract loanamount1 name_code using temp2
sort year mort_id
qui by year mort_id: gen obs=_N
tab obs
sort year id
qui by year id: gen obs1=_N
tab obs1
drop if obs>1
drop if obs1>1
keep year mort_id id

gen match_code=6
save matched6, replace
clear

use match
append using matched6
save match, replace
clear

use temp1
merge 1:1 year id using match
drop if _merge>1
drop _merge mort_id
save temp_trans, replace
clear


******************************************************************************************************************
* Based on perfect name match and rounded loan amount

use temp_trans
rename loanamount1 loanamount2
rename loanamount1a loanamount1
save temp, replace
clear

use hmda
merge 1:1 year mort_id using match
keep if _merge==1
drop _merge id
joinby year tract loanamount1 lendername using temp
sort year mort_id
qui by year mort_id: gen obs=_N
tab obs
sort year id
qui by year id: gen obs1=_N
tab obs1
drop if obs>1
drop if obs1>1
keep year mort_id id

gen match_code=7
save matched7, replace
clear

use match
append using matched7
save match, replace
clear

use temp1
merge 1:1 year id using match
drop if _merge>1
drop _merge mort_id
save temp_trans, replace
clear



******************************************************************************************************************


* Fix Names that correspond to subsidiaries that are owned by other company or might be listed as a bank instead of mortgage company

use temp_trans

gen lendername1=lendername
* These are subsidararies of Wells Fargo
replace lendername="WELLS FARGO BANK, NA" if lendername=="GREATER ATLANTA FINANCIAL SERVICES"
replace lendername="WELLS FARGO BANK, NA" if lendername=="AMERICAN PRIORITY MORTGAGE LLC"

* These are others
replace lendername="SUNTRUST MORTGAGE, INC" if lendername=="NATIONAL BANK OF COMMERCE"
replace lendername="CHASE MANHATTAN MORTGAGE CORP." if lendername=="CHASE MANHATTAN BANK USA, NA"
replace lendername="GMAC BANK" if lendername=="GMAC MORTGAGE LLC"
replace lendername="HSBC MORTGAGE CORP" if lendername=="DECISION ONE MORTGAGE"
replace lendername="1ST FINANCIAL BANK" if lendername=="FIRST FINANCIAL"
replace lendername="1ST NATIONAL BANK" if lendername=="FIRST NATIONAL BANK"
replace lendername="1ST TRUST BANK FOR SAVINGS" if lendername=="FIRST TRUST BANK"
replace lendername="OPTION ONE MORTGAGE CORP." if lendername=="H&R BLOCK"
replace lendername="OHIO SAVINGS BANK" if lendername=="AMTRUST BANK"
replace lendername="NATIONAL CITY BANK" if lendername=="FIRST FRANKLIN CORPORATION"
replace lendername="CITIBANK" if lendername=="CITI MORTGAGE"
replace lendername="FIRST TENNESSEE BANK NA" if lendername=="FIRST HORIZON HOME LOAN CORP"
replace lendername="WACHOVIA MORTGAGE" if lendername=="WACHOVIA BANK"
rename loanamount1 loanamount2
rename loanamount1a loanamount1
save temp, replace
clear

use hmda
merge 1:1 year mort_id using match
keep if _merge==1
drop _merge id

joinby year tract loanamount1 lendername using temp
sort year mort_id
qui by year mort_id: gen obs=_N
tab obs
sort year id
qui by year id: gen obs1=_N
tab obs1
drop if obs>1
drop if obs1>1
keep year mort_id id

gen match_code=8
save matched8, replace
clear

use match
append using matched8
save match, replace
clear

use temp1
merge 1:1 year id using match
drop if _merge>1
drop _merge mort_id
save temp_trans, replace
clear

***************************************************************************************************

* Name Code and rounded loan amount
use temp_trans
rename loanamount1 loanamount2
rename loanamount1a loanamount1
save temp, replace
clear


use hmda
drop name_code
merge 1:1 year mort_id using match
keep if _merge==1
drop _merge id
drop if lendername==""
gen name_code=soundex(lendername)
joinby year tract loanamount1 name_code using temp

sort year mort_id
qui by year mort_id: gen obs=_N
tab obs
sort year id
qui by year id: gen obs1=_N
tab obs1
drop if obs>1
drop if obs1>1
keep year mort_id id

gen match_code=9
save matched9, replace
clear

use match
append using matched9
save match, replace
clear

use temp1
merge 1:1 year id using match
drop if _merge>1
drop _merge mort_id
save temp_trans, replace
clear


*********************************************************************************************************************

* Match on Close match on Loan Amount

use hmda
merge 1:1 year mort_id using match
keep if _merge==1
drop _merge id
rename loanamount1a loanamount2a
rename loanamount1 loanamount2
joinby year tract lendername using temp_trans
gen ldiff1=abs(loanamount1-loanamount2)
drop if ldiff1>2
sort year mort_id
qui by year mort_id: gen obs=_N
tab obs
sort year id
qui by year id: gen obs1=_N
tab obs1
drop if obs>1
drop if obs1>1
keep year mort_id id

gen match_code=10
save matched10, replace

clear
use match
append using matched10
save match, replace
clear

use temp1
merge 1:1 year id using match
drop if _merge>1
drop _merge mort_id
save temp_trans, replace
clear


****************************************************************************************************************************
**************************************
* merge with transaction and hmda data to check matches based on soundex

use match
merge 1:1 year mort_id using hmda
drop if _merge~=3
drop _merge 
rename lendername lname_hmda
save temp, replace
merge 1:1 year id using transaction_data
drop if _merge~=3
rename lendername lname_trans
keep year mort_id id  match_code occupancy loanamount1 applicantethnicity coapplicantethnicity applicantrace1 ///
applicantrace2 applicantrace3 applicantrace4 coapplicantrace1 coapplicantrace1 coapplicantrace2 coapplicantrace3 ///
coapplicantrace4 coapplicantrace5 applicantsex coapplicantsex applicantincome lname_hmda taxpid buyerborro lname_trans ///
Owner MortSale x y fips fips1 blockg tract2 tract
save mort_income, replace

merge 1:1 year id using name_code_clean
tab match_code _merge
drop if _merge==2
drop if nomatch==1
* drop if match_code==5 & _merge==1
* drop if match_code==6 & _merge==1
* drop if match_code>7
drop _merge nomatch

label var year "year of hmda data/transaction data"
label var mort_id "unique mortgage id"
label var match_code "match_code"
label var id "unique buyer id"
save mort_income, replace

